### Summary ####
# Input: df_f, raw CE files 
# Output: Summary statistics tables and figures in Section 2
# Outline: 
# Part 1: Summary statistics from the final sample
# 1. Table I Panel A Column I and II
# 2. Table I Panel B Column I and II
# 3. Figure III
# 4. Table II
# Part 2: Summary statistics from raw CE (all interviews of CU interviewed in May, Jun, or Jul)
# 5. Table I Panel A Column III and IV
# 6. Table I Panel B Column III and IV
# IMPORTANT: Run code out of order can lead to different results

### ******** Results from the final panel ******** ####

### Summary ####
# Input: df_f, cnt20
# Output: Summary statistics table I Column 1 and 2, Table II, and Figure III

setwd(getwd())

### Data Processing ####
## Open libraries 
library(readr) # For importing datasets
library(readxl) # For importing datasets
library(dplyr) # For data processing 
library(ggplot2) # For graphs
library(weights) # For weighted summary statistics
library(expss) # For frequency tables


## Import data sets
cnt_cl <- read.csv("cnt_cleaned.csv") %>% select(NEWID,RYYMM,REBTUSED,CHCKEFT,EIP) %>% 
  filter(RYYMM==2012|RYYMM==2101|RYYMM==2102) %>% 
  rename(EIPII=EIP)

df_f <- read.csv("df_f.csv")

## Find the list of NEWIDs in the panel that could have EIPs
df_feb <- df_f %>% filter(YYMM==2102) %>% select(ID, NEWID, FINLWT21_AVG)
df_mar <- df_f %>% filter(YYMM==2103) %>% select(ID, NEWID, FINLWT21_AVG)
df_apr <- df_f %>% filter(YYMM==2104) %>% select(ID, NEWID, FINLWT21_AVG)

df_may <- df_f %>% filter(YYMM==2105) %>% select(ID, NEWID, FINLWT21_AVG)
df_jun <- df_f %>% filter(YYMM==2106) %>% select(ID, NEWID, FINLWT21_AVG)
df_jul <- df_f %>% filter(YYMM==2107) %>% select(ID, NEWID, FINLWT21_AVG)

df_aug <- df_f %>% filter(YYMM==2108) %>% select(ID, NEWID, FINLWT21_AVG)
df_sep <- df_f %>% filter(YYMM==2109) %>% select(ID, NEWID, FINLWT21_AVG)

# Since we consider the first lag, the previous interview of the May CUs should also be counted 
df_may_lag <- df_may
df_may_lag$NEWID <- df_may_lag$NEWID - 1

# similarly for the other months 
df_jun_lag <- df_jun
df_jun_lag$NEWID <- df_jun_lag$NEWID - 1

df_jul_lag <- df_jul
df_jul_lag$NEWID <- df_jul_lag$NEWID - 1

df_aug_lag <- df_aug
df_aug_lag$NEWID <- df_aug_lag$NEWID - 1

df_sep_lag <- df_sep
df_sep_lag$NEWID <- df_sep_lag$NEWID - 1

# bind all newids together and drop repititions

NEWID_list <- bind_rows(df_feb,df_mar,df_apr,df_may,df_may_lag,df_jun,df_jun_lag,df_jul,df_jul_lag,
                        df_aug,df_aug_lag,df_sep,df_sep_lag) %>% distinct(NEWID,.keep_all = TRUE)

EIPII <- merge(NEWID_list,cnt_cl,by="NEWID")

# Checking interviews with multiple EIPs
# rb1 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 1)
# rb2 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 2)
# rb3 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 3)
# rb4 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 4)
# rb5 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 5)
# rb6 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 6)
# rb7 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 7)
# rb8 <- EIPII  %>% group_by(NEWID) %>% filter( n() == 8)

# Aggregating to the CU-month level
# EIPII_m <- EIPII %>% group_by(NEWID,RYYMM) %>%
#   mutate(TEIPII = sum(EIPII)) %>% 
#   distinct(NEWID,RYYMM, .keep_all = TRUE) %>%
#   select(-c(EIPII))

### Table I Panel C Column I and II ####
#### Column I ####
fre(EIPII$RYYMM)

#### Column II ####
fre(EIPII$RYYMM, weight=EIPII$FINLWT21_AVG)

### Table I Panel D Column I and II ####

# Find number of recipients and number of number of non-recipients

df_f_cu <- df_f %>% distinct(ID, .keep_all=TRUE)

# These are the non-recipients 
df_f_cu_nr <- df_f_cu %>% filter(r==0)

#### Column I ####
1992/3918
length(df_f_cu_nr$ID)/length(df_f_cu$ID)

#### Column II ####
sum(df_f_cu_nr$FINLWT21_AVG)/sum(df_f_cu$FINLWT21_AVG)


### Table II ####

fre(EIPII$CHCKEFT, weight=EIPII$FINLWT21_AVG)


fre(EIPII$REBTUSED, weight=EIPII$FINLWT21_AVG)

### Table C.2 ####

# Aggregate to CU-3months level
EIPII_Q <- EIPII %>%
  group_by(NEWID) %>%
  mutate(EIPII_t=sum(EIPII)) %>%
  distinct(NEWID,.keep_all=TRUE) %>%
  ungroup()

table(cut(EIPII_Q$EIPII_t,breaks=c(1,599.9, 600.1,1199.9,1200.1,1799.9,1800.1,2399.9,2400.1,2999.9,3000.1,
                                100000)))


df_f_cu_p <- df_f %>% group_by(ID) %>%
  mutate(EIPII_t_total=sum(EIPII_t))

df_f_cu_p <- df_f_cu_p %>% group_by(ID) %>%
  mutate(EIPII_tm1_total=sum(EIPII_tm1))

df_f_cu_p$EIPII_total <- df_f_cu_p$EIPII_t_total + df_f_cu_p$EIPII_tm1_total

df_f_cu_p <- df_f_cu_p %>% distinct(ID, .keep_all=TRUE)

df_f_cu_nr_p <- df_f_cu_p %>% filter(EIPII_total==0)

# Note that we know that there are 2035 non-recipients, so EIP = 0
2035/3940
# 0<EIP<600
62/3940
# EIP = 600
671/3940
# 600 < EIP < 1200
55/3940
# EIP = 1200
604/3940
# 1200 < EIP < 1800
44/3940
# EIP = 1800
190/3940
# 1800 < EIP < 2400
29/3940
# EIP = 2400
116/3940
# 2400 < EIP < 3000
22/3940
# EIP = 3000
48/3940
# EIP > 3000
64/3940

# weighted numbers 
sum(df_f_cu_nr_p$FINLWT21_AVG)
# 0<EIP<1200
EIPII_Q_1 <- EIPII_Q %>% filter(EIPII_t>0 & EIPII_t <600)
sum(EIPII_Q_1$FINLWT21_AVG)
# EIP = 1200
EIPII_Q_2 <- EIPII_Q %>% filter(EIPII_t==600)
sum(EIPII_Q_2$FINLWT21_AVG)
# 1200 < EIP < 1700
EIPII_Q_3 <- EIPII_Q %>% filter(EIPII_t>600 & EIPII_t <1200)
sum(EIPII_Q_3$FINLWT21_AVG)
# EIP = 1700
EIPII_Q_4 <- EIPII_Q %>% filter(EIPII_t==1200)
sum(EIPII_Q_4$FINLWT21_AVG)
# 1700 < EIP < 2400
EIPII_Q_5 <- EIPII_Q %>% filter(EIPII_t>1200 & EIPII_t <1800)
sum(EIPII_Q_5$FINLWT21_AVG)
# EIP = 2400
EIPII_Q_6 <- EIPII_Q %>% filter(EIPII_t==1800)
sum(EIPII_Q_6$FINLWT21_AVG)
# 2400 < EIP < 2900
EIPII_Q_7 <- EIPII_Q %>% filter(EIPII_t>1800 & EIPII_t <2400)
sum(EIPII_Q_7$FINLWT21_AVG)
# EIP = 2900
EIPII_Q_8 <- EIPII_Q %>% filter(EIPII_t==2400)
sum(EIPII_Q_8$FINLWT21_AVG)
# 2900 < EIP < 3400
EIPII_Q_9 <- EIPII_Q %>% filter(EIPII_t>2400 & EIPII_t<3000)
sum(EIPII_Q_9$FINLWT21_AVG)
# EIP = 2900
EIPII_Q_10 <- EIPII_Q %>% filter(EIPII_t==3000)
sum(EIPII_Q_10$FINLWT21_AVG)
# 2900 < EIP < 3400
EIPII_Q_11 <- EIPII_Q %>% filter(EIPII_t>3000)
sum(EIPII_Q_11$FINLWT21_AVG)


sum(df_f_cu_nr_p$FINLWT21_AVG) + sum(EIPII_Q_1$FINLWT21_AVG) + sum(EIPII_Q_2$FINLWT21_AVG)+
  sum(EIPII_Q_3$FINLWT21_AVG) + sum(EIPII_Q_4$FINLWT21_AVG) + sum(EIPII_Q_5$FINLWT21_AVG) + sum(EIPII_Q_6$FINLWT21_AVG) + 
  sum(EIPII_Q_7$FINLWT21_AVG) + sum(EIPII_Q_8$FINLWT21_AVG) + sum(EIPII_Q_9$FINLWT21_AVG) + sum(EIPII_Q_10$FINLWT21_AVG) + 
  sum(EIPII_Q_11$FINLWT21_AVG) + 

sum(df_f_cu_nr_p$FINLWT21_AVG)/101014409
sum(EIPII_Q_1$FINLWT21_AVG)/101014409
sum(EIPII_Q_2$FINLWT21_AVG)/101014409
sum(EIPII_Q_3$FINLWT21_AVG)/101014409
sum(EIPII_Q_4$FINLWT21_AVG)/101014409
sum(EIPII_Q_5$FINLWT21_AVG)/101014409
sum(EIPII_Q_6$FINLWT21_AVG)/101014409
sum(EIPII_Q_7$FINLWT21_AVG)/101014409
sum(EIPII_Q_8$FINLWT21_AVG)/101014409
sum(EIPII_Q_9$FINLWT21_AVG)/101014409
sum(EIPII_Q_10$FINLWT21_AVG)/101014409
sum(EIPII_Q_11$FINLWT21_AVG)/101014409

# average EIP 
summary(EIPII_Q$EIPII_t)
weighted.mean(EIPII_Q$EIPII_t,EIPII_Q$FINLWT21_AVG)

sd(EIPII_Q$EIPII_t)

### Table C.6 ####
# Drop high income
df_f$MARITAL_t <- ifelse(df_f$MARITAL1_t == 1, 1, 0)


# For single, without kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 75000 & FINCBTXM_FST > 50000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 100000 & FINCBTXM_FST > 75000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 125000 & FINCBTXM_FST > 100000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

# For single, with kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 150000 & FINCBTXM_FST > 125000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t ==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)


# For married couple, no kids
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 175000 & FINCBTXM_FST > 150000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)


# For married couple, with kids
# 
check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==1 & NUM_KIDS_t >0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)


# For adults, no kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 200000 & FINCBTXM_FST > 175000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t ==0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)

# For adults, with kids

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 225000 & FINCBTXM_FST > 200000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 250000 & FINCBTXM_FST > 225000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 275000 & FINCBTXM_FST > 250000)
table(check$r)

check <- df_f %>% filter(MARITAL_t==0 & NUM_ADTS_t > 1 & NUM_KIDS_t > 0 & FINCBTXM_FST < 300000 & FINCBTXM_FST > 275000)
table(check$r)


### ******** Results from raw CE ******** ####

### Summary ####
# Input: cnt20 and fmli193 -- fmli204
# Output: Summary statistics table I column 3 and 4

### Data Processing ####
# Import datasets
cnt20 <- read_excel("Raw data/cnt20.xlsx") %>% filter(CONTCODE==800) %>% 
  filter(CONTMO==12|CONTMO==1|CONTMO==2)
cnt21 <- read_excel("Raw data/cnt21.xlsx") %>% filter(CONTCODE==800) %>% 
  filter(CONTMO==1|CONTMO==2)
cnt <- bind_rows(cnt20,cnt21)

fmli202 <- read_excel("Raw data/fmli202.xlsx")
fmli203 <- read_excel("Raw data/fmli203.xlsx")
fmli204 <- read_excel("Raw data/fmli204.xlsx")
fmli211 <- read_excel("Raw data/fmli211.xlsx")
fmli212 <- read_excel("Raw data/fmli212.xlsx")
fmli213 <- read_excel("Raw data/fmli213.xlsx")

####  Obtain interviews with rebates from cnt20
cnt_rc <- cnt  %>%
  # rename CONTMO
  mutate(RYYMM = ifelse(CONTMO==12,2012,
                        ifelse(CONTMO==1,2101,2102))) %>% 
  select(NEWID,RYYMM,CHCKEFT,REBTUSED,CONTEXPX) %>%
  # Rename CONTEXPX
  rename(EIPII = CONTEXPX)

#### Obtain all interviews from fmli's
fmli <- bind_rows(fmli211,fmli212,fmli213) %>% 
  select(NEWID,QINTRVMO) %>%
  mutate(YYMM=ifelse(QINTRVMO==1,2101,
                     ifelse(QINTRVMO==2, 2102,
                            ifelse(QINTRVMO==3, 2103,
                                   ifelse(QINTRVMO==4,2104,
                                          ifelse(QINTRVMO==5,2105,
                                                 ifelse(QINTRVMO==6,2106,
                                                        ifelse(QINTRVMO==7,2107,
                                                               ifelse(QINTRVMO==8,2108,2109))))))))) %>%
  filter(YYMM!=2101) %>% 
  select(-c(QINTRVMO))

fmli_flt <- fmli %>% 
  select (NEWID) %>% 
  mutate(
    RYYMM = NA,
    CHCKEFT = NA,
    REBTUSED = NA,
    EIPII = 0
  )

#### Obtain interview without rebates 
# cnt20_nr contains all interviews without rebates reported 
cnt_nr <- fmli_flt %>% filter(!(NEWID %in% cnt_rc$NEWID))

# Merge to form a cnt20_f that contains all information about rebates
cnt_f <- rbind(cnt_rc,cnt_nr)

#### Merge df with fmli (fmli193 to fmli204)
# df now contains all EIPI information as well as other info already in fmli
df <- merge(fmli,cnt_f,by="NEWID")

# Note that cnt20_f has 100 more observations than df 
# These are November rebates reported in Feb 2021

####  Keep only CUs that are interviewed in Feb, March, and April 
df <- df %>% mutate(
  ID = substr(as.character(NEWID),1,6))

df_all <- df

feb_list <- df %>% filter(YYMM==2102) %>% select(ID)
mar_list <- df %>% filter(YYMM==2103) %>% select(ID)
apr_list <- df %>% filter(YYMM==2104) %>% select(ID)

df <- df %>%
  filter(ID %in% feb_list$ID | ID %in% mar_list$ID | ID %in% apr_list$ID) 

#### Obtain weights 
fmli202_wts <- fmli202 %>% select(NEWID,FINLWT21)
fmli203_wts <- fmli203 %>% select(NEWID,FINLWT21)
fmli204_wts <- fmli204 %>% select(NEWID,FINLWT21)
fmli211_wts <- fmli211 %>% select(NEWID,FINLWT21)
fmli212_wts <- fmli212 %>% select(NEWID,FINLWT21)
fmli213_wts <- fmli213 %>% select(NEWID,FINLWT21)

ID_creator <- function(fmli){
  fmli_weights_income <- fmli %>% 
    mutate(
      ID = substr(as.character(NEWID),1,6),
    ) %>% select(ID, FINLWT21)
  return(fmli_weights_income)}

fmli202_wts <- ID_creator(fmli202)
fmli203_wts <- ID_creator(fmli203)
fmli204_wts <- ID_creator(fmli204)
fmli211_wts <- ID_creator(fmli211) 
fmli212_wts <- ID_creator(fmli212)
fmli213_wts <- ID_creator(fmli213) 

# Merge to obtain the weights, income, and liquidity in each interview 
wts <- merge(fmli202_wts,fmli203_wts,by="ID",all=TRUE)

wts <- wts %>% rename(
  FINLWT21_202 = FINLWT21.x, FINLWT21_203 = FINLWT21.y)

wts <- merge(wts,fmli204_wts, by="ID", all=TRUE)
wts <- wts %>% rename(
  FINLWT21_204 = FINLWT21)

wts <- merge(wts,fmli211_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_211 = FINLWT21)

wts <- merge(wts,fmli212_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_212 = FINLWT21)

wts <- merge(wts,fmli213_wts,by="ID",all=TRUE)
wts <- wts %>% rename(
  FINLWT21_213 = FINLWT21)

# Average weights 
wts$FINLWT21_AVG <- rowMeans(wts[,c(
                                    "FINLWT21_202",
                                    "FINLWT21_203",
                                    "FINLWT21_204",
                                    "FINLWT21_211",
                                    "FINLWT21_212",
                                    "FINLWT21_213")], 
                             na.rm=TRUE)

wts <- wts %>% select(ID,FINLWT21_AVG)

#### merge with weights 
df <- merge(df,wts,by="ID")

#### find only interviews with rebates
df_r <- df %>% filter(EIPII>0)

#### Aggregating to the CU-month level
# df_r <- df_r %>% group_by(NEWID,RYYMM) %>%
#   mutate(TEIPII = sum(EIPII)) %>% 
#   distinct(NEWID,RYYMM, .keep_all = TRUE) %>%
#   select(-c(EIPII))

### Table I Panel C Column III and IV ####

#### Column III ####
fre(df_r$RYYMM)

#### Column IV ####
fre(df_r$RYYMM,weight=df_r$FINLWT21_AVG)

### Table I Panel D Column III and IV ####

df_cu <- df %>%
  group_by(ID) %>%
  mutate(TotalEIPII=sum(EIPII),
         r = ifelse(TotalEIPII>0,1,0)) %>% 
  distinct(ID,.keep_all = TRUE)

df_cu_nr <- df_cu %>% filter(r==0)

#### Column III ####
2722/5213

#### Column IV ####
sum(df_cu_nr$FINLWT21_AVG)/sum(df_cu$FINLWT21_AVG)

### Total number and dollar amount of EIPs ####
#### merge with weights 
df_all <- merge(df_all,wts,by="ID")

#### Find only interviews with rebates
df_all_r <- df_all %>% filter(EIPII>0)

# Total number of rebates
fre(df_all_r$RYYMM,weight=df_all_r$FINLWT21_AVG)


# Total amount of payments
sum(df_all_r$EIPII*df_all_r$FINLWT21_AVG)
